/**
 * 
 */
package ca.uwindsor.cs.deepweb.utility.wikipedia;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;

/**
 * @author Liang Jie
 * 
 */
public class CnWikiNoneArticleRemover {
	/**
	 * Instance of buffered reader to read the files
	 */
	protected BufferedReader bufferedreader_in;

	/**
	 * The directory which contains the files to be checked
	 */
	protected File dataDir;

	protected int offset;

	public CnWikiNoneArticleRemover(String dirpath) {
		offset = 1;
		dataDir = new File(dirpath);
	}

	public void go() {
		
		File[] dataFiles = dataDir.listFiles();
		System.out.println("Start to process the files!");
		for (int i = 0; i < dataFiles.length; i++) {
			if (dataFiles[i].isFile()
					&& dataFiles[i].getName().endsWith(".txt")) {
				InputStreamReader read = null;
				try {
					read = new InputStreamReader(new FileInputStream(
							dataFiles[i]), "UTF-8");
				} catch (FileNotFoundException e) {
					e.printStackTrace();
					System.err.println("File not found");
					System.exit(-1);
				} catch (UnsupportedEncodingException e){
					e.printStackTrace();
					System.err.println("error encoding");
					System.exit(-2);
				}

				bufferedreader_in = new BufferedReader(read);

				String string_line = new String();
				try {
					bufferedreader_in.readLine();
					string_line = bufferedreader_in.readLine();
					bufferedreader_in.close();
					if (!string_line.startsWith("&gt")) {

//						StringBuffer filename = new StringBuffer();
//						filename.append(dataDir);
//						filename.append(java.io.File.separator);
//						filename.append("zhwiki_");
//						filename.append(offset);
//						filename.append(".txt");
//
//						File newfile = new File(filename.toString());
//						if (dataFiles[i].renameTo(newfile)) {
//							System.out.println("renamed " + dataFiles[i].getName()
//									+ " to: " + filename.toString());
//						}
//						offset++;
					} else {
						File newfile = new File(dataDir
								+ java.io.File.separator + "removed"
								+ java.io.File.separator
								+ dataFiles[i].getName());
						if (dataFiles[i].renameTo(newfile)) {
							System.out.println("removed " + dataFiles[i]);
						}
					}

				} catch (IOException e) {
					e.printStackTrace();
					System.exit(-2);
				}

			}
		}

	}

	/**
	 * @param args
	 */
	public static void main(String[] args) {
//		args= new String[1];
//		args[0] = "SourceData\\zh_wiki\\";
		if(args.length!=1){
			System.exit(-1);
		}
		CnWikiNoneArticleRemover remover = new CnWikiNoneArticleRemover(
				args[0]);
		remover.go();

	}

}
